#########################################
######### Balancing Plots ############
####   Appendix D - Figures D1A and D1B  #####
########   October 10, 2018 ######
####  CHecked the rerun: Dec 16, 2022 ####

rm(list=ls())
library(stargazer)
library(ggplot2)
library(foreign)
######GEO DATA####
# setwd("~/Dropbox/Personal Research 2017")


#########################################
##### Create  Bandwidths ###############
geo2001=read.csv("~/Dropbox/Personal Research 2017/replications/geocensus2001_nov16.csv")

#Distance to Mysore-Bombay Border
rd10.mb=geo2001[which(geo2001$NEAR_DIST_border1<10000),] #20 km
rd25.mb=geo2001[which(geo2001$NEAR_DIST_border1<25000),] #50 km
rd50.mb=geo2001[which(geo2001$NEAR_DIST_border1<50000),] #100 km
rd100.mb=geo2001[which(geo2001$NEAR_DIST_border1<100000),] #200 km

table(rd10.mb$border1)

#Distance to Hyderabad-Bombay Border
rd10.hb=geo2001[which(geo2001$NEAR_DIST_border2<10000),] #20 km
rd25.hb=geo2001[which(geo2001$NEAR_DIST_border2<25000),] #50 km
rd50.hb=geo2001[which(geo2001$NEAR_DIST_border2<50000),] #100 km
rd100.hb=geo2001[which(geo2001$NEAR_DIST_border2<100000),] #200 km
table(rd10.hb$border2)


#Balancing and Different Borders 
#Mysore- Bombay
#the whole data 
t1=t.test(geo2001$TOT_POP[geo2001$border1==1], geo2001$TOT_POP[geo2001$border1==0])
t2=t.test(geo2001$TOT_SC[geo2001$border1==1], geo2001$TOT_SC[geo2001$border1==0])
t3=t.test(geo2001$TOT_ST[geo2001$border1==1], geo2001$TOT_ST[geo2001$border1==0])
t5=t.test(geo2001$Slope[geo2001$border1==1], geo2001$Slope[geo2001$border1==0])
t6=t.test(geo2001$TerrainRug[geo2001$border1==1], geo2001$TerrainRug[geo2001$border1==0])
est=rbind(t1$estimate, t2$estimate, t3$estimate,  t5$estimate, t6$estimate)
pval=rbind(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value)
geo=cbind(est, pval)
rownames(geo)=c("Total Population", 
                "Total Scheduled Castes Pop",
                "Total Scheduled Tribes Pop", 
                "Slope", "Terrain Rugness")
colnames(geo)=c("Mean Tr", "Mean Cont", "T-Test P.Value")
stargazer(geo, digits = 3)


t0=as.matrix(c(t1$stat, t2$stat, t3$stat, t5$stat, t6$stat))
t0

p0=as.matrix(c(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value))
p0

#25 km Mysore-Bombay
t1=t.test(rd25.mb$TOT_POP[rd25.mb$border1==1], rd25.mb$TOT_POP[rd25.mb$border1==0])
t2=t.test(rd25.mb$TOT_SC[rd25.mb$border1==1], rd25.mb$TOT_SC[rd25.mb$border1==0])
t3=t.test(rd25.mb$TOT_ST[rd25.mb$border1==1], rd25.mb$TOT_ST[rd25.mb$border1==0])
t5=t.test(rd25.mb$Slope[rd25.mb$border1==1], rd25.mb$Slope[rd25.mb$border1==0])
t6=t.test(rd25.mb$TerrainRug[rd25.mb$border1==1], rd25.mb$TerrainRug[rd25.mb$border1==0])
est=rbind(t1$estimate, t2$estimate, t3$estimate,  t5$estimate, t6$estimate)
pval=rbind(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value)
mb.25=cbind(est, pval)
rownames(mb.25)=c("Total Population", 
                  "Total Scheduled Castes Pop",
                  "Total Scheduled Tribes Pop", 
                  "Slope", "Terrain Rugness")
colnames(mb.25)=c("Mean Tr", "Mean Cont", "T-Test P.Value")
stargazer(mb.25, digits = 3)

t25=as.matrix(c(t1$stat, t2$stat, t3$stat, t5$stat, t6$stat))
t25

p25=as.matrix(c(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value))
p25


#50 km Mysore-Bombay
t1=t.test(rd50.mb$TOT_POP[rd50.mb$border1==1], rd50.mb$TOT_POP[rd50.mb$border1==0])
t2=t.test(rd50.mb$TOT_SC[rd50.mb$border1==1], rd50.mb$TOT_SC[rd50.mb$border1==0])
t3=t.test(rd50.mb$TOT_ST[rd50.mb$border1==1], rd50.mb$TOT_ST[rd50.mb$border1==0])
t5=t.test(rd50.mb$Slope[rd50.mb$border1==1], rd50.mb$Slope[rd50.mb$border1==0])
t6=t.test(rd50.mb$TerrainRug[rd50.mb$border1==1], rd50.mb$TerrainRug[rd50.mb$border1==0])
est=rbind(t1$estimate, t2$estimate, t3$estimate,  t5$estimate, t6$estimate)
pval=rbind(t1$p.value, t2$p.value, t3$p.value,  t5$p.value, t6$p.value)
mb.50=cbind(est, pval)
rownames(mb.50)=c("Total Population", 
                  "Total Scheduled Castes Pop",
                  "Total Scheduled Tribes Pop", 
                  "Slope", "Terrain Rugness")
colnames(mb.50)=c("Mean Tr", "Mean Cont", "T-Test P.Value")
stargazer(mb.50, digits = 3)

t50=as.matrix(c(t1$stat, t2$stat, t3$stat, t5$stat, t6$stat))
t50

p50=as.matrix(c(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value))
p50


#10 km Mysore-Bombay
t1=t.test(rd10.mb$TOT_POP[rd10.mb$border1==1], rd10.mb$TOT_POP[rd10.mb$border1==0])
t2=t.test(rd10.mb$TOT_SC[rd10.mb$border1==1], rd10.mb$TOT_SC[rd10.mb$border1==0])
t3=t.test(rd10.mb$TOT_ST[rd10.mb$border1==1], rd10.mb$TOT_ST[rd10.mb$border1==0])
t5=t.test(rd10.mb$Slope[rd10.mb$border1==1], rd10.mb$Slope[rd10.mb$border1==0])
t6=t.test(rd10.mb$TerrainRug[rd10.mb$border1==1], rd10.mb$TerrainRug[rd10.mb$border1==0])
est=rbind(t1$estimate, t2$estimate, t3$estimate, t5$estimate, t6$estimate)
pval=rbind(t1$p.value, t2$p.value, t3$p.value,  t5$p.value, t6$p.value)
mb.10=cbind(est, pval)
rownames(mb.10)=c("Total Population", 
                  "Total Scheduled Castes Pop",
                  "Total Scheduled Tribes Pop", 
                  "Slope", "Terrain Rugness")
colnames(mb.10)=c("Mean Tr", "Mean Cont", "T-Test P.Value")
stargazer(mb.10, digits = 3)

t10=as.matrix(c(t1$stat, t2$stat, t3$stat, t5$stat, t6$stat))
t10

p10=as.matrix(c(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value))
p10




tmys=cbind(p0, p10, p25, p50)
rownames(tmys)=c("Population", "SC Population", "ST Population", 
                 "Slope", "Terrain Ruggedness")
colnames(tmys)=c("no bw", "20 km", "50 km", "100 km")
tmys=as.data.frame(tmys)



#plots of balancing 
plot_mys=ggplot(tmys, aes(y=rownames(tmys), x=value, shape=Bandwidth))+
  geom_point(aes(x=tmys$`no bw`, shape="no bw"))+
  geom_point(aes(x=tmys$`20 km`, shape="20 km"))+
  geom_point(aes(x=tmys$`50 km`, shape="50 km"))+
  geom_point(aes(x=tmys$`100 km`, shape="100 km"))+
  geom_vline(xintercept=(0.05), linetype="dashed")+
  xlab("p-value")+ylab("Covariates")

plot_mys

pdf("mys_balance_vol2.pdf",width=7,height=5)
par(mfrow=c(3,2), mai=c(.85,.85,.2,.2)) 
plot_mys
dev.off()

##############################################
######Hyderabad-Bombay Border

#the whole data
t1=t.test(geo2001$TOT_POP[geo2001$border2==1], geo2001$TOT_POP[geo2001$border2==0])
t2=t.test(geo2001$TOT_SC[geo2001$border2==1], geo2001$TOT_SC[geo2001$border2==0])
t3=t.test(geo2001$TOT_ST[geo2001$border2==1], geo2001$TOT_ST[geo2001$border2==0])
t5=t.test(geo2001$Slope[geo2001$border2==1], geo2001$Slope[geo2001$border2==0])
t6=t.test(geo2001$TerrainRug[geo2001$border2==1], geo2001$TerrainRug[geo2001$border2==0])
est=rbind(t1$estimate, t2$estimate, t3$estimate,t5$estimate, t6$estimate)
pval=rbind(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value)
geoh=cbind(est, pval)
rownames(geoh)=c("Total Population", 
                 "Total Scheduled Castes Pop",
                 "Total Scheduled Tribes Pop",
                 "Slope", "Terrain Rugness")
colnames(geoh)=c("Mean Tr", "Mean Cont", "T-Test P.Value")
stargazer(geoh, digits = 3)



t0=as.matrix(c(t1$stat, t2$stat, t3$stat, t5$stat, t6$stat))
t0

p0=as.matrix(c(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value))
p0



#25 km Hyderabad-Bombay
t1=t.test(rd25.hb$TOT_POP[rd25.hb$border2==1], rd25.hb$TOT_POP[rd25.hb$border2==0])
t2=t.test(rd25.hb$TOT_SC[rd25.hb$border2==1], rd25.hb$TOT_SC[rd25.hb$border2==0])
t3=t.test(rd25.hb$TOT_ST[rd25.hb$border2==1], rd25.hb$TOT_ST[rd25.hb$border2==0])
t5=t.test(rd25.hb$Slope[rd25.hb$border2==1], rd25.hb$Slope[rd25.hb$border2==0])
t6=t.test(rd25.hb$TerrainRug[rd25.hb$border2==1], rd25.hb$TerrainRug[rd25.hb$border2==0])
est=rbind(t1$estimate, t2$estimate, t3$estimate, t5$estimate, t6$estimate)
pval=rbind(t1$p.value, t2$p.value, t3$p.value,  t5$p.value, t6$p.value)
hb.25=cbind(est, pval)
rownames(hb.25)=c("Total Population", 
                  "Total Scheduled Castes Pop",
                  "Total Scheduled Tribes Pop", 
                  "Slope", "Terrain Rugness")
colnames(hb.25)=c("Mean Tr", "Mean Cont", "T-Test P.Value")
stargazer(hb.25, digits = 3)

t25=as.matrix(c(t1$stat, t2$stat, t3$stat, t5$stat, t6$stat))
t25

p25=as.matrix(c(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value))
p25



#50 km Hyderabad-Bombay
t1=t.test(rd50.hb$TOT_POP[rd50.hb$border2==1], rd50.hb$TOT_POP[rd50.hb$border2==0])
t2=t.test(rd50.hb$TOT_SC[rd50.hb$border2==1], rd50.hb$TOT_SC[rd50.hb$border2==0])
t3=t.test(rd50.hb$TOT_ST[rd50.hb$border2==1], rd50.hb$TOT_ST[rd50.hb$border2==0])
t5=t.test(rd50.hb$Slope[rd50.hb$border2==1], rd50.hb$Slope[rd50.hb$border2==0])
t6=t.test(rd50.hb$TerrainRug[rd50.hb$border2==1], rd50.hb$TerrainRug[rd25.hb$border2==0])
est=rbind(t1$estimate, t2$estimate, t3$estimate,  t5$estimate, t6$estimate)
pval=rbind(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value)
hb.50=cbind(est, pval)
rownames(hb.50)=c("Total Population", 
                  "Total Scheduled Castes Pop",
                  "Total Scheduled Tribes Pop", 
                  "Slope", "Terrain Rugness")
colnames(hb.50)=c("Mean Tr", "Mean Cont", "T-Test P.Value")
stargazer(hb.50, digits = 3)

t50=as.matrix(c(t1$stat, t2$stat, t3$stat, t5$stat, t6$stat))
t50

p50=as.matrix(c(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value))
p50


#10 km Hyderabad-Bombay
t1=t.test(rd10.hb$TOT_POP[rd10.hb$border2==1], rd10.hb$TOT_POP[rd10.hb$border2==0])
t2=t.test(rd10.hb$TOT_SC[rd10.hb$border2==1], rd10.hb$TOT_SC[rd10.hb$border2==0])
t3=t.test(rd10.hb$TOT_ST[rd10.hb$border2==1], rd10.hb$TOT_ST[rd10.hb$border2==0])
t5=t.test(rd10.hb$Slope[rd10.hb$border2==1], rd10.hb$Slope[rd10.hb$border2==0])
t6=t.test(rd10.hb$TerrainRug[rd10.hb$border2==1], rd10.hb$TerrainRug[rd10.hb$border2==0])
est=rbind(t1$estimate, t2$estimate, t3$estimate, t5$estimate, t6$estimate)
pval=rbind(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value)
hb.10=cbind(est, pval)
rownames(hb.10)=c("Total Population", 
                  "Total Scheduled Castes Pop",
                  "Total Scheduled Tribes Pop", 
                  "Slope", "Terrain Rugness")
colnames(hb.10)=c("Mean Tr", "Mean Cont", "T-Test P.Value")
stargazer(hb.10, digits = 3)

t10=as.matrix(c(t1$stat, t2$stat, t3$stat, t5$stat, t6$stat))
t10

p10=as.matrix(c(t1$p.value, t2$p.value, t3$p.value, t5$p.value, t6$p.value))
p10


thyd=cbind(p0, p10, p25, p50)
rownames(thyd)=c("Population", "SC Population", "ST Population",
                 "Slope", "Terrain Ruggedness")
colnames(thyd)=c("no bw", "20 km", "50 km", "100 km")
thyd=as.data.frame(thyd)



plot_hyd=ggplot(thyd, aes(y=rownames(thyd), x=value, shape=Bandwidth))+
  geom_point(aes(x=thyd$`no bw`, shape="no bw"))+
  geom_point(aes(x=thyd$`20 km`, shape="20 km"))+
  geom_point(aes(x=thyd$`50 km`, shape="50 km"))+
  geom_point(aes(x=thyd$`100 km`, shape="100 km"))+
  geom_vline(xintercept=(0.05), linetype="dashed")+
  xlab("p-value")+ylab("Covariates")

plot_hyd

pdf("hyd_balance_vol2.pdf",width=7,height=5)
par(mfrow=c(3,2), mai=c(.85,.85,.2,.2)) 
plot_hyd
dev.off()
